{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from xgboost import XGBClassifier\n",
    "import xgboost as xgb\n",
    "\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.model_selection import StratifiedKFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# read in data，数据在xgboost安装的路径下的demo目录,现在我们将其copy到当前代码下的data目录\n",
    "dpath = './data/'\n",
    "train = pd.read_csv(dpath + \"RentListingInquries_FE_train.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train = train[\"interest_level\"]\n",
    "X_train = train.drop(\"interest_level\", axis = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# prepare cross validation\n",
    "kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_depth': [6, 7, 8], 'min_child_weight': [5, 6, 7, 9]}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#max_depth 建议3-10， min_child_weight=1／sqrt(ratio_rare_event) =5.5\n",
    "#第一次  Best: -0.588659 using {'max_depth': 7, 'min_child_weight': 5}\n",
    "#第二次 Best: -0.588350 using {'max_depth': 6, 'min_child_weight': 5}\n",
    "#max_depth = range(3,10,2)\n",
    "#min_child_weight = range(1,6,2)\n",
    "max_depth = [6,7,8]\n",
    "min_child_weight = [5,6,7,9]\n",
    "param_test2_1 = dict(max_depth=max_depth, min_child_weight=min_child_weight)\n",
    "param_test2_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4.51343\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "starttime = time.clock()\n",
    "print(starttime)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/lyp/.local/lib/python3.6/site-packages/sklearn/model_selection/_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.58835, std: 0.00340, params: {'max_depth': 6, 'min_child_weight': 5},\n",
       "  mean: -0.58864, std: 0.00359, params: {'max_depth': 6, 'min_child_weight': 6},\n",
       "  mean: -0.58916, std: 0.00331, params: {'max_depth': 6, 'min_child_weight': 7},\n",
       "  mean: -0.58884, std: 0.00429, params: {'max_depth': 6, 'min_child_weight': 9},\n",
       "  mean: -0.58866, std: 0.00500, params: {'max_depth': 7, 'min_child_weight': 5},\n",
       "  mean: -0.58850, std: 0.00443, params: {'max_depth': 7, 'min_child_weight': 6},\n",
       "  mean: -0.58928, std: 0.00360, params: {'max_depth': 7, 'min_child_weight': 7},\n",
       "  mean: -0.58938, std: 0.00364, params: {'max_depth': 7, 'min_child_weight': 9},\n",
       "  mean: -0.59117, std: 0.00493, params: {'max_depth': 8, 'min_child_weight': 5},\n",
       "  mean: -0.59101, std: 0.00392, params: {'max_depth': 8, 'min_child_weight': 6},\n",
       "  mean: -0.58971, std: 0.00379, params: {'max_depth': 8, 'min_child_weight': 7},\n",
       "  mean: -0.59001, std: 0.00405, params: {'max_depth': 8, 'min_child_weight': 9}],\n",
       " {'max_depth': 6, 'min_child_weight': 5},\n",
       " -0.5883498705462652)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb2_1 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=205,  #第一轮参数调整得到的n_estimators最优值\n",
    "        max_depth=5,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.3,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch2_1 = GridSearchCV(xgb2_1, param_grid = param_test2_1, scoring='neg_log_loss',n_jobs=-1, cv=kfold)\n",
    "gsearch2_1.fit(X_train , y_train)\n",
    "\n",
    "gsearch2_1.grid_scores_, gsearch2_1.best_params_,     gsearch2_1.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "247.710825\n",
      "243.197395\n"
     ]
    }
   ],
   "source": [
    "endtime = time.clock()\n",
    "print(endtime)\n",
    "print (endtime - starttime)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best: -0.588350 using {'max_depth': 6, 'min_child_weight': 5}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/lyp/.local/lib/python3.6/site-packages/sklearn/utils/deprecation.py:122: FutureWarning: You are accessing a training score ('mean_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "/home/lyp/.local/lib/python3.6/site-packages/sklearn/utils/deprecation.py:122: FutureWarning: You are accessing a training score ('std_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "/home/lyp/.local/lib/python3.6/site-packages/sklearn/utils/deprecation.py:122: FutureWarning: You are accessing a training score ('split0_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "/home/lyp/.local/lib/python3.6/site-packages/sklearn/utils/deprecation.py:122: FutureWarning: You are accessing a training score ('split1_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "/home/lyp/.local/lib/python3.6/site-packages/sklearn/utils/deprecation.py:122: FutureWarning: You are accessing a training score ('split2_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "/home/lyp/.local/lib/python3.6/site-packages/sklearn/utils/deprecation.py:122: FutureWarning: You are accessing a training score ('split3_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "/home/lyp/.local/lib/python3.6/site-packages/sklearn/utils/deprecation.py:122: FutureWarning: You are accessing a training score ('split4_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n"
     ]
    }
   ],
   "source": [
    "# summarize results\n",
    "print(\"Best: %f using %s\" % (gsearch2_1.best_score_, gsearch2_1.best_params_))\n",
    "test_means = gsearch2_1.cv_results_[ 'mean_test_score' ]\n",
    "test_stds = gsearch2_1.cv_results_[ 'std_test_score' ]\n",
    "train_means = gsearch2_1.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = gsearch2_1.cv_results_[ 'std_train_score' ]\n",
    "\n",
    "pd.DataFrame(gsearch2_1.cv_results_).to_csv('my_preds_maxdepth_min_child_weights_1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
